package srst.util.PDFHandleUtil;

import java.io.FileNotFoundException;
import java.io.IOException;

import org.apache.pdfbox.io.RandomAccessBufferedFileInputStream;
import org.apache.pdfbox.io.RandomAccessRead;
import org.apache.pdfbox.pdfparser.PDFParser;
import org.apache.pdfbox.pdmodel.PDDocument;
import org.apache.pdfbox.text.PDFTextStripper;


public class PDFReaderUtil {
	
	/**
	 * simply reader all the text from a pdf file. You have to deal with the
	 * format of the output text by yourself. 2008-2-25
	 * 
	 * @param pdfFilePath
	 *            file path
	 * @return all text in the pdf file
	 * @throws IOException 
	 * 
	 * 这里的new太多了，在完成之后需要修改
	 */
	public static String getTextFromPDF(String pdfFilePath) throws IOException {
		String result = null;
		PDDocument document = null;
		try {
			RandomAccessRead randomAccessRead = new RandomAccessBufferedFileInputStream(pdfFilePath);
			PDFParser parser = new PDFParser(randomAccessRead);
			parser.parse();
			document = parser.getPDDocument();
			PDFTextStripper stripper = new PDFTextStripper();
			result = stripper.getText(document);
		} catch (FileNotFoundException e) {
			// TODO Auto-generated catch block
			e.printStackTrace();
		} catch (IOException e) {
			// TODO Auto-generated catch block
			e.printStackTrace();
		} finally {
			if (document != null) {
					document.close();
			}
		}
		return result;
	}
	
	

//	public static void main(String[] args) {
//		String str = null;
//		try {
//			str =  PDFReaderUtil.getTextFromPDF(FilePathUtil.TEXT_PATH);
//		} catch (IOException e) {
//			// TODO Auto-generated catch block
//			e.printStackTrace();
//		}
//		System.out.println(str);
//
//	}
}
